In [8]:
import pandas as pd
# Load the dataset
movies = pd.read_csv(r"C:\Users\lenovo\Downloads\Datasets\Datasets\movies.csv")
# Check the first few rows to understand the structure
movies.head()
# Check the columns to see what data we have
movies.columns
Out[8]:
Index(['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y',
'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres',
'imdb_rating', 'imdb_votes', 'story', 'summary', 'tagline', 'actors',
'wins_nominations', 'release_date'],
dtype='object')
In [40]:
movies['summary'].head()
Out[40]:
0 Indian army special forces execute a covert op... 4 Under the 'Evening Shadows' truth often plays... 5 While fighting crimes against women in Delhi ... 11 A coming-of-age story based on the lives of st... 15 A dynamic young entrepreneur finds herself loc... Name: summary, dtype: object
In [6]:
movies.head()
Out[6]:
| title_x | imdb_id | poster_path | wiki_link | title_y | original_title | is_adult | year_of_release | runtime | genres | imdb_rating | imdb_votes | story | summary | tagline | actors | wins_nominations | release_date | sentiment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Uri: The Surgical Strike | tt8291224 | https://upload.wikimedia.org/wikipedia/en/thum... | https://en.wikipedia.org/wiki/Uri:_The_Surgica... | Uri: The Surgical Strike | Uri: The Surgical Strike | 0 | 2019 | 138 | Action|Drama|War | 8.4 | 35112 | Divided over five chapters the film chronicle... | Indian army special forces execute a covert op... | NaN | Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... | 4 wins | 11 January 2019 (USA) | -0.221429 |
| 1 | Battalion 609 | tt9472208 | NaN | https://en.wikipedia.org/wiki/Battalion_609 | Battalion 609 | Battalion 609 | 0 | 2019 | 131 | War | 4.1 | 73 | The story revolves around a cricket match betw... | The story of Battalion 609 revolves around a c... | NaN | Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen... | NaN | 11 January 2019 (India) | 0.233333 |
| 2 | The Accidental Prime Minister (film) | tt6986710 | https://upload.wikimedia.org/wikipedia/en/thum... | https://en.wikipedia.org/wiki/The_Accidental_P... | The Accidental Prime Minister | The Accidental Prime Minister | 0 | 2019 | 112 | Biography|Drama | 6.1 | 5549 | Based on the memoir by Indian policy analyst S... | Explores Manmohan Singh's tenure as the Prime ... | NaN | Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S... | NaN | 11 January 2019 (USA) | 0.600000 |
| 3 | Why Cheat India | tt8108208 | https://upload.wikimedia.org/wikipedia/en/thum... | https://en.wikipedia.org/wiki/Why_Cheat_India | Why Cheat India | Why Cheat India | 0 | 2019 | 121 | Crime|Drama | 6.0 | 1891 | The movie focuses on existing malpractices in ... | The movie focuses on existing malpractices in ... | NaN | Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ... | NaN | 18 January 2019 (USA) | 0.200000 |
| 4 | Evening Shadows | tt6028796 | NaN | https://en.wikipedia.org/wiki/Evening_Shadows | Evening Shadows | Evening Shadows | 0 | 2018 | 102 | Drama | 7.3 | 280 | While gay rights and marriage equality has bee... | Under the 'Evening Shadows' truth often plays... | NaN | Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva... | 17 wins & 1 nomination | 11 January 2019 (India) | 0.000000 |
In [13]:
from textblob import TextBlob
# Define a function to get the sentiment polarity
def get_sentiment(text):
blob = TextBlob(str(text))
return blob.sentiment.polarity
# Apply the function to the 'description' column
movies['sentiment'] = movies['summary'].apply(get_sentiment)
# Show the updated dataset with sentiment values
movies[['title_x', 'sentiment']].head()
Out[13]:
| title_x | sentiment | |
|---|---|---|
| 0 | Uri: The Surgical Strike | -0.221429 |
| 1 | Battalion 609 | 0.233333 |
| 2 | The Accidental Prime Minister (film) | 0.600000 |
| 3 | Why Cheat India | 0.200000 |
| 4 | Evening Shadows | 0.000000 |
In [14]:
import matplotlib.pyplot as plt
# Plotting sentiment distribution
plt.figure(figsize=(10,6))
plt.hist(movies['sentiment'], bins=50, color='skyblue', edgecolor='black')
plt.title('Sentiment Distribution of Movie Descriptions', fontsize=16)
plt.xlabel('Sentiment Score', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.show()
In [15]:
# Assuming you have a 'rating' column
plt.figure(figsize=(10,6))
plt.scatter(movies['imdb_rating'], movies['sentiment'], alpha=0.6, color='orange')
plt.title('Sentiment vs imdb_rating', fontsize=16)
plt.xlabel('Movie Rating', fontsize=12)
plt.ylabel('Sentiment Score', fontsize=12)
plt.show()
In [16]:
# Sorting movies by highest sentiment
top_positive_movies = movies.sort_values(by='sentiment', ascending=False).head(10)
# Plotting
plt.figure(figsize=(10,6))
plt.barh(top_positive_movies['title_x'], top_positive_movies['sentiment'], color='green')
plt.title('Top 10 Movies with Positive Sentiment', fontsize=16)
plt.xlabel('Sentiment Score', fontsize=12)
plt.ylabel('Movie Title', fontsize=12)
plt.show()
In [17]:
# Assuming there's a 'genre' column
genre_sentiment = movies.groupby('genres')['sentiment'].mean().head(20)
# Plotting average sentiment by genre
genre_sentiment.sort_values().plot(kind='barh', figsize=(10,6), color='purple')
plt.title('Average Sentiment by genres', fontsize=16)
plt.xlabel('Average Sentiment', fontsize=12)
plt.ylabel('genres', fontsize=12)
plt.show()
In [18]:
movies.head(2)
Out[18]:
| title_x | imdb_id | poster_path | wiki_link | title_y | original_title | is_adult | year_of_release | runtime | genres | imdb_rating | imdb_votes | story | summary | tagline | actors | wins_nominations | release_date | sentiment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Uri: The Surgical Strike | tt8291224 | https://upload.wikimedia.org/wikipedia/en/thum... | https://en.wikipedia.org/wiki/Uri:_The_Surgica... | Uri: The Surgical Strike | Uri: The Surgical Strike | 0 | 2019 | 138 | Action|Drama|War | 8.4 | 35112 | Divided over five chapters the film chronicle... | Indian army special forces execute a covert op... | NaN | Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... | 4 wins | 11 January 2019 (USA) | -0.221429 |
| 1 | Battalion 609 | tt9472208 | NaN | https://en.wikipedia.org/wiki/Battalion_609 | Battalion 609 | Battalion 609 | 0 | 2019 | 131 | War | 4.1 | 73 | The story revolves around a cricket match betw... | The story of Battalion 609 revolves around a c... | NaN | Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen... | NaN | 11 January 2019 (India) | 0.233333 |
In [19]:
pip install plotly
Requirement already satisfied: plotly in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (6.0.1) Requirement already satisfied: narwhals>=1.15.1 in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (from plotly) (1.36.0) Requirement already satisfied: packaging in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (from plotly) (25.0) Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.0 -> 25.0.1 [notice] To update, run: C:\Users\lenovo\Downloads\snscrape_project\snscrape-env\Scripts\python.exe -m pip install --upgrade pip
In [32]:
import plotly.express as px
# Drop rows with missing required fields
movies= movies.dropna(subset=['imdb_rating', 'imdb_votes', 'wins_nominations']).head(40)
fig = px.scatter_3d(
movies,
x='imdb_rating',
y='imdb_votes',
z='wins_nominations',
color='imdb_votes',
hover_name='title_x',
title='Interactive 3D Scatter: imdb_rating vs imdb_votes vs wins_nominations',
color_continuous_scale='RdBu'
)
fig.show()
In [33]:
movies.head(1)
Out[33]:
| title_x | imdb_id | poster_path | wiki_link | title_y | original_title | is_adult | year_of_release | runtime | genres | imdb_rating | imdb_votes | story | summary | tagline | actors | wins_nominations | release_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Uri: The Surgical Strike | tt8291224 | https://upload.wikimedia.org/wikipedia/en/thum... | https://en.wikipedia.org/wiki/Uri:_The_Surgica... | Uri: The Surgical Strike | Uri: The Surgical Strike | 0 | 2019 | 138 | Action|Drama|War | 8.4 | 35112 | Divided over five chapters the film chronicle... | Indian army special forces execute a covert op... | NaN | Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... | 4 wins | 11 January 2019 (USA) |
In [34]:
print(movies.columns)
Index(['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y',
'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres',
'imdb_rating', 'imdb_votes', 'story', 'summary', 'tagline', 'actors',
'wins_nominations', 'release_date'],
dtype='object')
In [35]:
import plotly.graph_objects as go
import pandas as pd
# Filter and clean data
movies_clean = movies.dropna(subset=['imdb_rating', 'wins_nominations', 'year_of_release'])
# Create 3D box plot simulation using scatter for box outlines
fig = go.Figure()
# Box 1: imdb_rating vs year_of_release
fig.add_trace(go.Box(
y=movies_clean['imdb_rating'],
x=movies_clean['year_of_release'],
name='IMDb vs Year',
boxpoints='outliers',
marker_color='orange'
))
# Box 2: wins_nominations vs year_of_release
fig.add_trace(go.Box(
y=movies_clean['wins_nominations'],
x=movies_clean['year_of_release'],
name='Wins vs Year',
boxpoints='outliers',
marker_color='skyblue'
))
# Box 3: imdb_rating vs wins_nominations
fig.add_trace(go.Box(
y=movies_clean['imdb_rating'],
x=movies_clean['wins_nominations'],
name='IMDb vs Wins',
boxpoints='outliers',
marker_color='green'
))
fig.update_layout(
title='3D-style Box Plot of IMDb, Awards, and Year',
xaxis_title='X Axis',
yaxis_title='Y Axis',
showlegend=True,
template='plotly_dark'
)
fig.show()
In [37]:
import plotly.express as px
import pandas as pd
# Clean the dataset: ensure all required fields are available
movies_clean = movies.dropna(subset=['title_x', 'imdb_rating', 'wins_nominations', 'year_of_release', 'story']).tail(10)
# Create the 3D scatter plot
fig = px.scatter_3d(
movies_clean,
x='imdb_rating',
y='wins_nominations',
z='year_of_release',
color='imdb_rating', # color intensity based on rating
hover_data=['title_x', 'story'], # show title and story on hover
title='3D Movie Visualization: Rating, Awards, and Release Year',
labels={
'imdb_rating': 'IMDb Rating',
'wins_nominations': 'Wins/Nominations',
'year_of_release': 'Release Year',
'title_x': 'Title',
'story': 'Story'
},
color_continuous_scale='Viridis'
)
fig.update_traces(marker=dict(size=5, opacity=0.8))
fig.update_layout(
scene=dict(
xaxis_title='IMDb Rating',
yaxis_title='Wins/Nominations',
zaxis_title='Year of Release'
),
template='plotly_dark'
)
fig.show()
In [ ]:
In [ ]: